package com.xxx.tika.controller;

import org.apache.tika.Tika;
import org.apache.tika.exception.TikaException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Controller;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;

import javax.annotation.Resource;
import java.io.File;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

/**
 * tika读取pdf、docx、xlsx等类型文档内容
 */
@Validated
@Controller
@RequestMapping(value = "/", produces = "application/json;charset=utf-8")
public class DemoController {

    private static final Logger logger = LoggerFactory.getLogger(DemoController.class);

    @Resource
    private Tika tika;

    @RequestMapping("/test")
    @ResponseBody
    public String test() throws TikaException, IOException {
        logger.info("enter method test().");
        String filePath = "D:";   // 文件路径
        String fileName = "test.pdf";   // 文件名
//        String fileName = "test.docx";   // 文件名
//        String fileName = "test.xlsx";   // 文件名
        // 获取文件全路径
        Path path = Paths.get(filePath, fileName);
        // 获取文件对象
        File file = path.toFile();
        // 解析文件内容
        String content = tika.parseToString(file);
        System.out.println(content);
        return "success";
    }

}
